# -*- coding: utf-8 -*-
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule


class GkSpider(CrawlSpider):
    name = 'gk'
    allowed_domains = ['guokr.com']
    start_urls = ['https://www.guokr.com/ask/highlight/']

    rules = (
        Rule(LinkExtractor(allow=r'ask/highlight/\?page=\d+'), follow=True),
        Rule(LinkExtractor(allow=r'question/\d+'), callback='parse_item'),
    )

    def parse_item(self, response):
        item = {}
        item['title'] = response.xpath('//h1[@id="articleTitle"]/text()').extract_first()
        item['desc'] = response.xpath('//div[@id="questionDesc"]/p/text()').extract_first()
        # return item
        print(item)